#!/bin/bash -p

# EMBA - EMBEDDED LINUX ANALYZER
#
# Copyright 2020-2025 Siemens Energy AG
#
# EMBA comes with ABSOLUTELY NO WARRANTY. This is free software, and you are
# welcome to redistribute it under the terms of the GNU General Public License.
# See LICENSE file for usage of this software.
#
# EMBA is licensed under GPLv3
# SPDX-License-Identifier: GPL-3.0-only
#
# Author(s): Michael Messner

# Description:  This module tries to identify the kernel file and the init command line
#               The identified kernel binary file is extracted with vmlinux-to-elf

export THREAD_PRIO=1

S24_kernel_bin_identifier()
{
  module_log_init "${FUNCNAME[0]}"
  module_title "Kernel Binary and Configuration Identifier"
  pre_module_reporter "${FUNCNAME[0]}"

  local lNEG_LOG=0
  local lFILE_PATH=""
  local lK_INITS_ARR=()
  local lK_INIT=""
  local lCFG_MD5=""
  export KCFG_MD5_ARR=()

  write_csv_log "file path" "Kernel version stripped" "file output" "identified init" "config extracted" "kernel symbols" "architecture" "endianness"

  local lWAIT_PIDS_S24_main=()
  # just in case s09 has not already created the strings output directory
  if ! [[ -d "${S09_LOG_DIR}"/strings_bins/ ]]; then
    mkdir -p "${S09_LOG_DIR}"/strings_bins/ 2>/dev/null
  fi

  while read -r lBINARY_ENTRY; do
    binary_kernel_check_threader "${lBINARY_ENTRY}" &
    local lTMP_PID="$!"
    lWAIT_PIDS_S24_main+=( "${lTMP_PID}" )
    max_pids_protection "${MAX_MOD_THREADS}" lWAIT_PIDS_S24_main
  done < <(grep -v "ASCII text\|Unicode text" "${P99_CSV_LOG}" | sort -u || true)

  wait_for_pid "${lWAIT_PIDS_S24_main[@]}"
  # shellcheck disable=SC2153
  find "${LOG_PATH_MODULE}" -name "threading_*.tmp" -exec cat {} \; | tee -a "${LOG_FILE}"
  if [[ -f "${S24_CSV_LOG}" ]]; then
    if [[ $(wc -l < "${S24_CSV_LOG}") -gt 1 ]]; then
      lNEG_LOG=$(wc -l < "${S24_CSV_LOG}" 2>/dev/null || echo 0)
    else
      rm "${S24_CSV_LOG}"
    fi
  fi

  module_end_log "${FUNCNAME[0]}" "${lNEG_LOG}"
}

binary_kernel_check_threader() {
  local lBINARY_ENTRY="${1:-}"

  local lFILE_PATH=""
  local lFILE_NAME=""

  lFILE_PATH=$(echo "${lBINARY_ENTRY}" | cut -d ';' -f2)
  lFILE_NAME=$(basename "${lFILE_PATH}")

  local lLOG_FILE="${LOG_PATH_MODULE}/threading_${lFILE_NAME}.tmp"
  local lBIN_FILE="NA"
  local lKCONFIG_EXTRACTED="NA"
  local lK_VER_CLEAN="NA"
  local lK_INIT="NA"
  local lK_SYMBOLS=0
  local lK_ARCH="NA"
  local lK_ARCH_END="NA"
  local lK_CON_DET=""
  local lK_FILE=""
  local lK_VER_TMP=""
  export PACKAGING_SYSTEM="linux_kernel"
  export TYPE="operating-system"

  lBIN_FILE=$(echo "${lBINARY_ENTRY}" | cut -d ';' -f8)
  if [[ "${lBIN_FILE}" == "empty" || "${lBIN_FILE}" == *"text"* || "${lBIN_FILE}" == *" archive "* || "${lBIN_FILE}" == *" compressed "* || "${lBIN_FILE}" == *" image data"* ]]; then
    return
  fi
  lMD5_SUM=$(echo "${lBINARY_ENTRY}" | cut -d ';' -f9)
  lBIN_NAME_REAL="$(basename "${lFILE_PATH}")"
  # usually we are using the strings generated by s09. Just in case we do not have them now we are starting to generate these
  # files now. We use the same destination directory as for s09.
  lSTRINGS_OUTPUT="${S09_LOG_DIR}"/strings_bins/strings_"${lMD5_SUM}"_"${lBIN_NAME_REAL}".txt
  if ! [[ -d "${S09_LOG_DIR}/strings_bins" ]]; then
    mkdir -p "${S09_LOG_DIR}/strings_bins"
  fi
  if ! [[ -f "${lSTRINGS_OUTPUT}" ]]; then
    strings "${lFILE_PATH}" | uniq > "${lSTRINGS_OUTPUT}" || true
  fi

  lVERSION_JSON_CFG="${CONFIG_DIR}/bin_version_identifiers/linux_kernel.json"

  local lPARSING_MODE_ARR=()
  local lRULE_IDENTIFIER=""
  local lLICENSES_ARR=()
  local lPRODUCT_NAME_ARR=()
  local lVENDOR_NAME_ARR=()
  local lCSV_REGEX_ARR=()
  local lVERSION_IDENTIFIER_ARR=()

  # shellcheck disable=SC2034
  mapfile -t lPARSING_MODE_ARR < <(jq -r .parsing_mode[] "${lVERSION_JSON_CFG}")
  lRULE_IDENTIFIER=$(jq -r .identifier "${lVERSION_JSON_CFG}" || print_error "[-] Error in parsing ${lVERSION_JSON_CFG}")
  # shellcheck disable=SC2034
  mapfile -t lLICENSES_ARR < <(jq -r .licenses[] "${lVERSION_JSON_CFG}" 2>/dev/null || true)
  # shellcheck disable=SC2034
  mapfile -t lPRODUCT_NAME_ARR < <(jq -r .product_names[] "${lVERSION_JSON_CFG}" 2>/dev/null || true)
  # shellcheck disable=SC2034
  mapfile -t lVENDOR_NAME_ARR < <(jq -r .vendor_names[] "${lVERSION_JSON_CFG}" 2>/dev/null || true)
  # shellcheck disable=SC2034
  mapfile -t lCSV_REGEX_ARR < <(jq -r .version_extraction[] "${lVERSION_JSON_CFG}" 2>/dev/null || true)
  mapfile -t lVERSION_IDENTIFIER_ARR < <(jq -r .grep_commands[] "${lVERSION_JSON_CFG}" 2>/dev/null || true)

  for lVERSION_IDENTIFIER in "${lVERSION_IDENTIFIER_ARR[@]}"; do
    mapfile -t lVERSION_IDENTIFIED_ARR < <(grep -a -o -E "${lVERSION_IDENTIFIER}" "${lSTRINGS_OUTPUT}"| sort -u || true)

    if [[ "${#lVERSION_IDENTIFIED_ARR[@]}" -gt 0 ]]; then
      write_log "" "${lLOG_FILE}"
      for lVERSION_IDENTIFIED in "${lVERSION_IDENTIFIED_ARR[@]}"; do
        write_log "[+] Possible Linux Kernel found: ${ORANGE}${lFILE_PATH} / ${lVERSION_IDENTIFIED}${NC}" "${lLOG_FILE}"
      done
      write_log "" "${lLOG_FILE}"

      # rough init entry detection
      mapfile -t lK_INITS_ARR < <(grep -E "init=\/" "${lSTRINGS_OUTPUT}" | sed 's/.*rdinit/rdinit/' | sed 's/.*\ init/init/' | awk '{print $1}' | tr -d '"' | sort -u || true)
      for lK_INIT in "${lK_INITS_ARR[@]}"; do
        if [[ "${lK_INIT}" =~ init=\/.* ]]; then
          write_log "[+] Init found in Linux kernel file ${ORANGE}${lFILE_PATH}${NC}" "${lLOG_FILE}"
          write_log "" "${lLOG_FILE}"
          write_log "$(indent "$(orange "${lK_INIT}")")" "${lLOG_FILE}"
          write_log "" "${lLOG_FILE}"
        else
          lK_INIT="NA"
        fi
      done

      # we test all possible kernel files with vmlinux-to-elf. It does not matter if it is already an elf file or not
      # if it is already an elf file we need the output for the module report
      if [[ -e "${EXT_DIR}"/vmlinux-to-elf/vmlinux-to-elf ]]; then
        write_log "[*] Testing possible Linux kernel file ${ORANGE}${lFILE_PATH}${NC} with ${ORANGE}vmlinux-to-elf:${NC}" "${lLOG_FILE}"
        write_log "" "${lLOG_FILE}"
        "${EXT_DIR}"/vmlinux-to-elf/vmlinux-to-elf "${lFILE_PATH}" "${lFILE_PATH}".elf 2>/dev/null >> "${lLOG_FILE}" || true
        if [[ -f "${lFILE_PATH}".elf ]]; then
          lMD5_SUM=$(md5sum "${lFILE_PATH}".elf)
          lMD5_SUM="${lMD5_SUM/\ *}"
          if ! grep -q "${lMD5_SUM}" "${P99_CSV_LOG}"; then
            # we need to add our elf file to our main p99 csv file:
            binary_architecture_threader "${lFILE_PATH}.elf" "${FUNCNAME[0]}"
            lBINARY_ENTRY="$(grep -F "${lFILE_PATH}.elf" "${P99_CSV_LOG}" | sort -u | head -1 || true)"
          else
            # there is already an entry available in our P99 csv log -> we extract this one
            lBINARY_ENTRY="$(grep "${lMD5_SUM}" "${P99_CSV_LOG}" | sort -u | head -1 || true)"
          fi
          lBIN_FILE=$(echo "${lBINARY_ENTRY}" | cut -d ';' -f8)

          if [[ "${lBIN_FILE}" == *"ELF"* ]]; then
            write_log "" "${lLOG_FILE}"
            write_log "[+] Successfully generated Linux kernel elf file: ${ORANGE}${lFILE_PATH}.elf${NC}" "${lLOG_FILE}"
            export CONFIDENCE_LEVEL=4
            for lVERSION_IDENTIFIED in "${lVERSION_IDENTIFIED_ARR[@]}"; do
              version_parsing_logging "${S09_CSV_LOG}" "S24_kernel_bin_identifier" "${lVERSION_IDENTIFIED}" "${lBINARY_ENTRY}" "${lRULE_IDENTIFIER}" "lVENDOR_NAME_ARR" "lPRODUCT_NAME_ARR" "lLICENSES_ARR" "lCSV_REGEX_ARR"
            done
            # from now on we can work with our generated elf file
            lFILE_PATH+=".elf"
          else
            write_log "" "${lLOG_FILE}"
            write_log "[-] No Linux kernel elf file was created." "${lLOG_FILE}"
          fi
        fi
        write_log "" "${lLOG_FILE}"
      fi

      # if we have no elf file created and logged we now log the original kernel
      # in case we have an elf file lFILE_PATH was already included in the SBOM
      if [[ ! -f "${lFILE_PATH}.elf" ]] && [[ "${lBIN_FILE}" != *"ELF"* ]]; then
        for lVERSION_IDENTIFIED in "${lVERSION_IDENTIFIED_ARR[@]}"; do
          if version_parsing_logging "${S09_CSV_LOG}" "S24_kernel_bin_identifier" "${lVERSION_IDENTIFIED}" "${lBINARY_ENTRY}" "${lRULE_IDENTIFIER}" "lVENDOR_NAME_ARR" "lPRODUCT_NAME_ARR" "lLICENSES_ARR" "lCSV_REGEX_ARR"; then
            # print_output "[*] back from logging for ${lVERSION_IDENTIFIED} for non ELF kernel -> continue to next binary"
            return
          fi
        done
      fi

      # ensure this is only done in non SBOM_MINIMAL mode
      if [[ "${SBOM_MINIMAL:-0}" -eq 0 ]] ; then
        # we are using lBINARY_ENTRY which is already populated with our ELF data
        lK_FILE=$(echo "${lBINARY_ENTRY}" | cut -d ';' -f8)

        if [[ "${lK_FILE}" == *"ELF"* ]]; then
          lK_SYMBOLS="$(readelf -W -s "${lFILE_PATH}" | grep -c "FUNC\|OBJECT" || true)"

          [[ "${lK_FILE}" == *"LSB"* ]] && lK_ARCH_END="EL"
          [[ "${lK_FILE}" == *"MSB"* ]] && lK_ARCH_END="EB"

          [[ "${lK_FILE}" == *"MIPS"* ]] && lK_ARCH="MIPS"
          [[ "${lK_FILE}" == *"ARM"* ]] && lK_ARCH="ARM"
          [[ "${lK_FILE}" == *"80386"* ]] && lK_ARCH="x86"
          [[ "${lK_FILE}" == *"x86-64"* ]] && lK_ARCH="x64"
          [[ "${lK_FILE}" == *"PowerPC"* ]] && lK_ARCH="PPC"
          [[ "${lK_FILE}" == *"UCB RISC-V"* ]] && lK_ARCH="RISCV"
          [[ "${lK_FILE}" == *"QUALCOMM DSP6"* ]] && lK_ARCH="QCOM_DSP6"
        else
          # fallback
          lK_ARCH=$(grep "Guessed architecture" "${LOG_FILE}" | cut -d: -f2 | awk '{print $1}' | sort -u || true)
          [[ "${lK_ARCH: -2}" == "le" ]] && lK_ARCH_END="EL"
          [[ "${lK_ARCH: -2}" == "be" ]] && lK_ARCH_END="EB"
        fi

        disable_strict_mode "${STRICT_MODE}" 0
        extract_kconfig "${lFILE_PATH}" "${lLOG_FILE}"
        lKCONFIG_EXTRACTED="${KCONFIG_EXTRACTED}"
        enable_strict_mode "${STRICT_MODE}" 0

        local lCFG_CNT=0
        if [[ -f "${lKCONFIG_EXTRACTED}" ]]; then
          lCFG_CNT=$(grep -c CONFIG_ "${lKCONFIG_EXTRACTED}")
        fi
        # double check we really have a Kernel config extracted
        if [[ -f "${lKCONFIG_EXTRACTED}" ]] && [[ "${lCFG_CNT}" -gt 50 ]]; then
          write_log "[+] Extracted kernel configuration (${ORANGE}${lCFG_CNT} configuration entries${GREEN}) from ${ORANGE}$(basename "${lFILE_PATH}")${NC}" "${lLOG_FILE}"
          write_link "${lKCONFIG_EXTRACTED}" "${lLOG_FILE}"
          check_kconfig "${lKCONFIG_EXTRACTED}" "${lK_ARCH}" "${lLOG_FILE}"
        else
          write_log "[-] No valid kernel configuration extracted from ${ORANGE}$(basename "${lFILE_PATH}")${NC}" "${lLOG_FILE}"
          write_link "${lKCONFIG_EXTRACTED}" "${lLOG_FILE}"
        fi

        for lVERSION_IDENTIFIED in "${lVERSION_IDENTIFIED_ARR[@]}"; do
          # print_output "[*] Check for ELF - ${lBINARY_ENTRY}"

          lK_VER_TMP="${lVERSION_IDENTIFIED/Linux version /}"
          demess_kv_version "${lK_VER_TMP}"
          # -> KV_ARR
          # we should only get one element back, but as array
          for lK_VER_CLEAN in "${KV_ARR[@]}"; do
            if [[ "${#lK_INITS_ARR[@]}" -gt 0 ]]; then
              for lK_INIT in "${lK_INITS_ARR[@]}"; do
                # one dirty check if this could be a real config
                # without 50 config entries this is probably something different
                if [[ "${lCFG_CNT}" -lt 50 ]]; then
                  lKCONFIG_EXTRACTED="NA"
                fi
                write_csv_log "${lFILE_PATH}" "${lK_VER_CLEAN}" "${lBIN_FILE:-NA}" "${lK_INIT}" "${lKCONFIG_EXTRACTED}" "${lK_SYMBOLS}" "${lK_ARCH}" "${lK_ARCH_END}"
              done
            else
              write_csv_log "${lFILE_PATH}" "${lK_VER_CLEAN}" "${lBIN_FILE:-NA}" "NA" "${lKCONFIG_EXTRACTED}" "${lK_SYMBOLS}" "${lK_ARCH}" "${lK_ARCH_END}"
            fi
          done
        done
      fi
    # ASCII kernel config files:
    elif file -b "${lFILE_PATH}" | grep -q "ASCII"; then
      # ensure this is only done in non SBOM_MINIMAL mode
      if [[ "${SBOM_MINIMAL:-0}" -eq 0 ]] ; then
        lCFG_MD5=$(md5sum "${lFILE_PATH}" | awk '{print $1}')
        if [[ ! " ${KCFG_MD5_ARR[*]} " =~ ${lCFG_MD5} ]]; then
          lK_CON_DET=$(grep -E "^# Linux.*[0-9]{1}\.[0-9]{1,2}\.[0-9]{1,2}.* Kernel Configuration" "${lSTRINGS_OUTPUT}" || true)
          if [[ "${lK_CON_DET}" =~ \ Kernel\ Configuration ]]; then
            write_log "" "${lLOG_FILE}"
            write_log "[+] Found kernel configuration file: ${ORANGE}${lFILE_PATH}${NC}" "${lLOG_FILE}"
            check_kconfig "${lFILE_PATH}" "NA" "${lLOG_FILE}"
            KCFG_MD5_ARR+=("${lCFG_MD5}")
          fi
        fi
      fi
    fi
  done
}

extract_kconfig() {
  # Source: https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-ikconfig
  # # extract-ikconfig - Extract the .config file from a kernel image
  #
  # This will only work when the kernel was compiled with CONFIG_IKCONFIG.
  #
  # The obscure use of the "tr" filter is to work around older versions of
  # "grep" that report the byte offset of the line instead of the pattern.
  #
  # (c) 2009,2010 Dick Streefland <dick@streefland.net>
  # Licensed under the terms of the GNU General Public License.

  # Check invocation:
  export IMG="${1:-}"
  local lLOG_FILE="${2:-}"

  export KCONFIG_EXTRACTED=""

  if ! [[ -f "${IMG}" ]]; then
    write_log "[-] No kernel file to analyze here - ${ORANGE}${IMG}${NC}" "${lLOG_FILE}"
    return
  fi

  write_log "[*] Trying to extract kernel configuration from ${ORANGE}${IMG}${NC}" "${lLOG_FILE}"

  export CF1='IKCFG_ST\037\213\010'
  export CF2='0123456789'

  # Prepare temp files:
  export TMP1="${TMP_DIR}"/ikconfig$$.1
  export TMP2="${TMP_DIR}"/ikconfig$$.2
  # shellcheck disable=SC2064
  trap "rm -f ${TMP1} ${TMP2}" 0

  # Initial attempt for uncompressed images or objects:
  dump_config "${IMG}" "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  # That didn't work, so retry after decompression.
  try_decompress '\037\213\010' xy    gunzip "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress '\3757zXZ\000' abcde unxz "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress 'BZh'          xy    bunzip2 "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress '\135\0\0\0'   xxx   unlzma "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress '\211\114\132' xy    'lzop -d' "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress '\002\041\114\030' xyy 'lz4 -d -l' "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return

  try_decompress '\050\265\057\375' xxx unzstd "${lLOG_FILE}"
  [[ $? -eq 4 ]] && return
}

dump_config() {
  # Source: https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-ikconfig
  local lIMG_="${1:-}"
  local lLOG_FILE="${2:-}"
  local lCFG_MD5=""

  if ! [[ -f "${lIMG_}" ]]; then
    write_log "[-] No kernel file to analyze here - ${ORANGE}${lIMG_}${NC}" "${lLOG_FILE}"
    return
  fi

  if POS=$(tr "${CF1}\n${CF2}" "\n${CF2}=" < "${lIMG_}" | grep -abo "^${CF2}"); then
    POS=${POS%%:*}

    tail -c+"$((POS + 8))" "${lIMG_}" | zcat > "${TMP1}" 2> /dev/null

    if [[ $? != 1 ]]; then  # exit status must be 0 or 2 (trailing garbage warning)
      [[ "${STRICT_MODE}" -eq 1 ]] && set +e

      if ! [[ -f "${TMP1}" ]]; then
        return
      fi

      lCFG_MD5=$(md5sum "${TMP1}" | awk '{print $1}')
      if [[ ! " ${KCFG_MD5_ARR[*]} " =~ ${lCFG_MD5} ]]; then
        KCONFIG_EXTRACTED="${LOG_PATH_MODULE}/kernel_config_extracted_$(basename "${lIMG_}").log"
        cp "${TMP1}" "${KCONFIG_EXTRACTED}"
        KCFG_MD5_ARR+=("${lCFG_MD5}")
        # return value of 4 means we are done and we are going back to the main function of this module for the next file
        return 4
      else
        write_log "[*] Firmware binary ${ORANGE}${IMG}${NC} already analyzed .. skipping" "${lLOG_FILE}"
        return 4
      fi
    fi
  fi
}

try_decompress() {
  # Source: https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-ikconfig
  local lLOG_FILE="${1:-}"

  export POS=""
  for POS in $(tr "$1\n$2" "\n$2=" < "${IMG}" | grep -abo "^$2"); do
    POS=${POS%%:*}
    tail -c+"${POS}" "${IMG}" | "${3}" > "${TMP2}" 2> /dev/null
    dump_config "${TMP2}" "${lLOG_FILE}"
    [[ $? -eq 4 ]] && return 4
  done
}

check_kconfig() {
  local lKCONFIG_FILE="${1:-}"
  local lKCONFIG_ARCH="${2:-}"
  local lLOG_FILE="${3:-}"

  local lKCONF_HARD_CHECKER="${EXT_DIR}/kconfig-hardened-check/bin/kernel-hardening-checker"
  local lFAILED_KSETTINGS=""
  local lKCONF_LOG=""

  if ! [[ -e "${lKCONF_HARD_CHECKER}" ]]; then
    write_log "[-] Kernel config hardening checker not found" "${lLOG_FILE}"
    return
  fi

  if ! [[ -f "${lKCONFIG_FILE}" ]]; then
    return
  fi

  if [[ "${lKCONFIG_ARCH,,}" == *"mips"* ]]; then
    write_log "[-] Architecture ${ORANGE}${lKCONFIG_ARCH}${NC} not supported by ${ORANGE}kernel-hardening-checker${NC}." "${lLOG_FILE}"
    return
  fi

  write_log "[*] Testing kernel configuration file ${ORANGE}${lKCONFIG_FILE}${NC} with kconfig-hardened-check (architecture ${lKCONFIG_ARCH})." "${lLOG_FILE}"
  lKCONF_LOG="${LOG_PATH_MODULE}/kconfig_hardening_check_$(basename "${lKCONFIG_FILE}")_${RANDOM}.log"
  "${lKCONF_HARD_CHECKER}" -c "${lKCONFIG_FILE}" | tee -a "${lKCONF_LOG}" || true
  if [[ -f "${lKCONF_LOG}" ]]; then
    lFAILED_KSETTINGS=$(grep -c "FAIL: " "${lKCONF_LOG}" || true)
    if [[ "${lFAILED_KSETTINGS}" -gt 0 ]]; then
      write_log "[+] Found ${ORANGE}${lFAILED_KSETTINGS}${GREEN} security related kernel settings which should be reviewed - ${ORANGE}$(print_path "${lKCONFIG_FILE}")${NC}" "${lLOG_FILE}"
      write_link "${lKCONF_LOG}" "${lLOG_FILE}"
      write_log "" "${lLOG_FILE}"
      write_log "[*] Statistics:${lFAILED_KSETTINGS}" "${lLOG_FILE}"
    fi
  fi
}
